Analysis of ARPAV Temperature Data

library(tidyverse)
library(R2jags)
library(bayesplot)

global_seed <- 30172
set.seed(global_seed)

global_font <- "Roboto Condensed"
theme_set(theme_minimal(base_size = 15, base_family = global_font))
my_pal <- c("#EE9C39", "#20A47B", "#5A3920")
loc_path <- "./data/temp/malo"

temp_data <- NULL
for (type in c("min", "avg", "max")) {
    file <- list.files(
        path = loc_path,
        full.names = TRUE,
        pattern = paste0("*", type, ".csv")
    )

    tmp_df <- read.csv(file, sep = ";", na.strings = ">>")
    tmp_df <- tmp_df[-ncol(tmp_df)]
    names(tmp_df) <- c("year", 1:12)

    tmp_df <- tmp_df |>
        pivot_longer(-year, names_to = "month", values_to = "value") |>
        add_column(variable = type, .after = "month") |>
        mutate(
            date = make_date(year = year, month = month), .before = 1
        ) |>
        mutate(month = factor(
            month.abb[as.integer(month)], levels = month.abb)
        )

    temp_data <- rbind(temp_data, tmp_df)
}

temp_data <- arrange(temp_data, date) |>
    pivot_wider(names_from = "variable", values_from = "value") |>
    na.omit()
years <- unique(temp_data$year)

baseline <- temp_data |>
    filter(year <= years[5]) |>
    group_by(month) |>
    summarize(min = mean(min), avg = mean(avg), max = mean(max))

temp_data |>
    inner_join(baseline, by = "month") |>
    mutate(avg = avg.x - avg.y) |>
    ggplot() +
        geom_tile(aes(x = year, y = fct_rev(month), fill = avg)) +
        scale_x_continuous(
            breaks = scales::pretty_breaks(),
            expand = c(0, 0)
        ) +
        scale_fill_gradientn(
            colours = wesanderson::wes_palette("Zissou1", 10, "continuous")
        ) +
        labs(
            x = "Year", y = "Month", fill = "Deviation (°C)",
            title = paste0(
                "Deviation of the average monthly ",
                "temperature w.r.t. the ",
                years[1], "-", years[5], " period"
            )
        )

temp_data |>
    inner_join(baseline, by = "month") |>
    mutate(diff = (max.x - min.x) - (max.y - min.y)) |>
    ggplot() +
        geom_tile(aes(x = year, y = fct_rev(month), fill = diff)) +
        scale_x_continuous(
            breaks = scales::pretty_breaks(),
            expand = c(0, 0)
        ) +
        scale_fill_gradientn(
            colours = wesanderson::wes_palette("Zissou1", 20, "continuous")
        ) +
        labs(
            x = "Year", y = "Month", fill = "Deviation (°C)",
            title = paste0(
                "Deviation of the average monthly thermal ",
                "excursion w.r.t. the ",
                years[1], "-", years[5], " period"
            )
        )

smoothed <- temp_data |>
    group_by(year) |>
    summarize(temp = mean(avg))

# standard linear fit to get prior parameters
lm_fit <- lm(temp ~ year, data = smoothed)
lm_params <- summary(lm_fit)$coefficients[, 1]
lm_errs <- summary(lm_fit)$coefficients[, 2]
model <- sprintf("
    model {
        for (i in 1:length(year)) {
            mu[i] <- a + b * year[i]
            temp[i] ~ dnorm(mu[i], tau)
        }
        
        a ~ dnorm(%f, %f)
        b ~ dnorm(%f, %f)
        sigma ~ dexp(0.001)
        tau <- 1 / sigma^2
    }",
    lm_params[1], 1 / lm_errs[1]^2,
    lm_params[2], 1 / lm_errs[2]^2
)

pars <- c("a", "b", "sigma")

chain <- R2jags::jags(
    data = smoothed,
    inits = function() list(a = 0, b = 0, sigma = 1),
    parameters.to.save = pars,
    model.file = textConnection(model),
    n.chains = 3,
    n.iter = 100000,
    n.burnin = 2000,
    n.thin = 10,
    DIC = FALSE
) |> as.mcmc()
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 29
##    Unobserved stochastic nodes: 3
##    Total graph size: 129
## 
## Initializing model
trace_plt <- mcmc_trace(
    chain, pars = pars,
    facet_args = list(nrow = 3, labeller = label_parsed)
) +
    facet_text(size = 15) +
    labs(title = "Traces") +
    bayesplot::theme_default(base_size = 15, base_family = global_font)

dens_plt <- mcmc_dens_overlay(
    chain, pars = pars,
    facet_args = list(nrow = 3, labeller = label_parsed)
) +
    facet_text(size = 15) +
    labs(title = "Densities") +
    bayesplot::theme_default(base_size = 15, base_family = global_font)

gridExtra::grid.arrange(trace_plt, dens_plt, ncol = 2)